Graphs of Turnout

#Read in voter hstory data
vhist <- read_csv("data/2017_CO/VHist_2017/CO_2017_VHist_full.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   VOTER_ID = col_integer(),
##   ELECTION_TYPE = col_character(),
##   ELECTION_DATE = col_character(),
##   ELECTION_DESCRIPTION = col_character(),
##   VOTING_METHOD = col_character(),
##   PARTY = col_character(),
##   COUNTY_NAME = col_character()
## )

Problems in VHist I did not initially find

summary(as.factor(vhist$VOTING_METHOD))
##     Absentee Carry      Absentee Mail       Early Voting 
##             106196           13258152            1224179 
## Early Voting - DRE          In Person    In Person - DRE 
##             418364             187168             160042 
##        Mail Ballot  Mail Ballot - DRE      Polling Place 
##            8397030               2840            8746351 
##        Vote Center  Vote Center - DRE               NA's 
##             231354             181178            1269812
vhist_na <- vhist %>%
  filter(is.na(VOTING_METHOD))

summary(as.factor(vhist_na$COUNTY_NAME))
##      Adams    Alamosa  Archuleta    Boulder    Chaffee    Conejos 
##         11          1          2          6          5          2 
##      Delta    Douglas     Gilpin   Gunnison    Jackson  Jefferson 
##          1          2        104         79        915    1264424 
##      Kiowa Kit Carson   La Plata       Lake Las Animas      Logan 
##        382          4         93       1458         18          7 
##    Mineral     Moffat  Montezuma     Morgan      Otero      Ouray 
##         10          5         48          4          4          3 
##       Park   Phillips     Pitkin Rio Blanco Rio Grande   Saguache 
##          1        114          2          4          1         13 
## San Miguel     Summit     Teller       Yuma 
##       2081          1          6          1
summary(as.factor(vhist$COUNTY_NAME))
##       Adams     Alamosa    Arapahoe   Archuleta        Baca        Bent 
##     2053966       96137     3709094       86775       36137       31366 
##     Boulder  Broomfield     Chaffee    Cheyenne Clear Creek     Conejos 
##     2689444      326236      133980       23503       82810       68632 
##    Costilla     Crowley      Custer       Delta      Denver     Dolores 
##       38063       27871       45622      229703     4287463       18342 
##     Douglas       Eagle     El Paso      Elbert     Fremont    Garfield 
##     1940459      248526     3567938      192444      291829      313742 
##      Gilpin       Grand    Gunnison    Hinsdale    Huerfano     Jackson 
##       40622      110281       99259       10511       57919       15082 
##   Jefferson       Kiowa  Kit Carson    La Plata        Lake     Larimer 
##     4558186       17634       51812      346228       43923     2287320 
##  Las Animas     Lincoln       Logan        Mesa     Mineral      Moffat 
##      106573       44428      142804     1142624        9986      102418 
##   Montezuma    Montrose      Morgan       Otero       Ouray        Park 
##      156604      288434      143654      114665       49152      122798 
##    Phillips      Pitkin     Prowers      Pueblo  Rio Blanco  Rio Grande 
##       44681      130499       81201     1038130       51655       75302 
##       Routt    Saguache    San Juan  San Miguel    Sedgwick      Summit 
##      168492       40928        7771       63924       27954      170536 
##      Teller  Washington        Weld        Yuma 
##      150301       39491     1411792       77010
summary(as.factor(year(mdy(vhist_na$ELECTION_DATE))))
##   1988   1990   1991   1992   1993   1994   1995   1996   1997   1998 
##  78288 104370  12150 136783  53392  97355  52841 155341  65252 142150 
##   1999   2000   2001   2002   2003   2004   2005   2006   2007   2008 
##  80369 211508  74738    446    118   1267    328   2147    938     31
summary(as.factor(vhist_na$ELECTION_TYPE))
## Coordinated     General     Primary      Recall     Special 
##      327941      691002      236446        2272       12151

There appear to be several different counties here that have NA’s in the Voting Method column. Most NAs are in Jefferson. Admittedly, that is the most populous of counties, but it is still about 1 in 40 voters! At first glance, the number of NAs rises exponentially with the totals of voters per county.

Every single NA is concentrated before 2013, when it could be reasonaby supposed that at least local elections were conducted through VBM. The vast, VAST majority are before 2002.

The distribution of NAs across election types seems fairly consistent with the total amount of votes cast in each contest type. This is reasonable, since it suggests that counting method is independent of election type.

Returning to the Graphs

#Moving on with code as if problems were not there
#Sorting out voting methods
vhist <- vhist %>%
  select(3:6, 8)

vhist$VOTING_METHOD[vhist$VOTING_METHOD == "Early Voting - DRE"] <- "Early Voting" 
vhist$VOTING_METHOD[vhist$VOTING_METHOD == "In Person - DRE"] <- "In Person"
vhist$VOTING_METHOD[vhist$VOTING_METHOD == "Mail Ballot - DRE"] <- "Mail Ballot" 
vhist$ELECTION_DATE <- mdy(vhist$ELECTION_DATE)
    #More needed, don't know what ones to cut, will do the following##

#Create turnouts file with raw vote counts
turnouts <- vhist %>%
  mutate(count = 1) %>%
  group_by(COUNTY_NAME, ELECTION_DESCRIPTION, ELECTION_TYPE, ELECTION_DATE, VOTING_METHOD) %>%
  summarize(total_votes = sum(count)) %>%
  spread(key = VOTING_METHOD, value = total_votes) %>%
  filter(year(ELECTION_DATE) >= 1992)

Number of registered voters, under some assumptions

vrf <- read_csv("data/2017_CO/VRF_2017/CO_2017_VRF_full.csv", 
    col_types = cols_only(VOTER_ID = col_guess(), COUNTY = col_guess(), REGISTRATION_DATE = col_guess(), EFFECTIVE_DATE = col_guess(), VOTER_STATUS = col_guess()))

vrf$REGISTRATION_DATE <- mdy(vrf$REGISTRATION_DATE)
vrf$EFFECTIVE_DATE <- mdy(vrf$EFFECTIVE_DATE)

vrf <- vrf %>%
  filter(year(REGISTRATION_DATE) > 1910 && year(EFFECTIVE_DATE) > 1910) %>%
  filter(VOTER_STATUS == "Active")

col_county_registrants <- vrf %>%
  mutate(count = 1, YEAR_REGISTERED = year(REGISTRATION_DATE)) %>%
  group_by(COUNTY, YEAR_REGISTERED) %>%
  summarize(REGISTERED_IN_YEAR = sum(count))

col_county_registrants <- col_county_registrants %>%
  group_by(COUNTY) %>%
  mutate(REGISTRANTS = cumsum(REGISTERED_IN_YEAR)) %>%
  filter(YEAR_REGISTERED >= 1992)

Actual Graph Creation Step

#Graphwith test data
years <- data.frame(as.integer(seq(1990, 2017, 1)), runif(28, 1, 30))

names(years) <- c("year", "dat")

ggplot(years, aes(x = year, y = dat)) +
  geom_vline(xintercept = 1992, col = "red") +
  geom_vline(xintercept = 2008, col = "red") +
  geom_vline(xintercept = 2013, col = "red") +
  geom_smooth() +
  geom_point()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Maps

col <- map_data("county") %>%
  filter(region == 'colorado')

county_registrants <- vrf %>%
  mutate(count = 1) %>%
  group_by(COUNTY) %>%
  summarise(TOTAL_REGISTERED = sum(count))

county_registrants$COUNTY <- tolower(county_registrants$COUNTY) # matching string

col_reg <- merge(col, county_registrants, by.x = "subregion", by.y = "COUNTY")

col_reg$pop_cat <- cut(col_reg$TOTAL_REGISTERED, breaks = c(seq(0, 500000, by = 100000)), labels=1:5)

p <- col_reg %>%
  group_by(group) %>%
  plot_ly(x = ~long, y = ~lat, color = ~pop_cat, colors = c('pink', 'dark red'),
          text = ~subregion, hoverinfo = 'text') %>%
  add_polygons(line = list(width = 0.4)) %>%
  add_polygons(
    fillcolor = 'transparent',
    line = list(color = 'black', width = 0.5),
    showlegend = FALSE, hoverinfo = 'none'
  ) %>%
  layout(
    title = "Colorado Total Registrants by County",
    titlefont = list(size = 10),
    xaxis = list(title = "", showgrid = FALSE,
                 zeroline = FALSE, showticklabels = FALSE),
    yaxis = list(title = "", showgrid = FALSE,
                 zeroline = FALSE, showticklabels = FALSE)
  )

p